configuration DB

sqlHost <- "localhost"
sqlUserName <- "sa"
sqlPassword <- "yourStrong(!)Password"
sqlDefaultDb <- "NhanesLandingZone"

Load libs

library(naniar)
library(visdat)
library(corrplot)
library(phonto)

Load data

demo = phonto::nhanes('DEMO_E')
DT::datatable(demo)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Missing pattern across variables (eg. age)

gg_miss_fct(demo,fct = RIDAGEYR)

show missing percentage

gg_miss_var(demo,show_pct = TRUE)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

Plot missing pattern

vis_miss(demo,warn_large_data=FALSE)

Missing Correlation

missing_cols = sapply(demo, function(x)sum(is.na(x)))
missing_cols = missing_cols[missing_cols>0] # select the data with missing values
missing_cols
## RIDEXMON RIDAGEMN RIDAGEEX DMQMILIT DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2 
##      387      439      782     3778        6     8439     7649     4214 
## DMDSCHOL DMDMARTL INDHHIN2 INDFMIN2 INDFMPIR RIDEXPRG DMDHRBR2 DMDHREDU 
##     7879     4214       77       94      894     8941      268      266 
## DMDHRMAR DMDHSEDU  SIALANG SIAPROXY SIAINTRP  FIALANG FIAPROXY FIAINTRP 
##      341     4738        1        1        1      128      128      128 
##  MIALANG MIAPROXY MIAINTRP  AIALANG 
##     3003     3003     3003     2415
correlation = cor(is.na(demo[,names(missing_cols)]))
corrplot(correlation, type = "upper", order = "hclust",
         tl.col = "black", tl.srt = 45)